{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Chroma\n",
    "> Chroma is a AI-native open-source vector database focused on developer productivity and happiness. Chroma is licensed under Apache 2.0.\n",
    "<br>Chroma 是一个 AI 原生的开源向量数据库，专注于提高开发者的生产力和幸福感。Chroma 在 Apache 2.0 下获得许可。"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# import\n",
    "from langchain_chroma import Chroma\n",
    "from langchain_community.document_loaders import TextLoader\n",
    "from langchain_community.embeddings.sentence_transformer import (\n",
    "    SentenceTransformerEmbeddings,\n",
    ")\n",
    "from langchain_text_splitters import CharacterTextSplitter\n",
    "\n",
    "# load the document and split it into chunks\n",
    "loader = TextLoader(\"../../modules/state_of_the_union.txt\")\n",
    "documents = loader.load()\n",
    "\n",
    "# split it into chunks\n",
    "text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)\n",
    "docs = text_splitter.split_documents(documents)\n",
    "\n",
    "# create the open-source embedding function\n",
    "embedding_function = SentenceTransformerEmbeddings(model_name=\"all-MiniLM-L6-v2\")\n",
    "\n",
    "# load it into Chroma\n",
    "db = Chroma.from_documents(docs, embedding_function)\n",
    "\n",
    "# query it\n",
    "query = \"What did the president say about Ketanji Brown Jackson\"\n",
    "docs = db.similarity_search(query)\n",
    "\n",
    "# print results\n",
    "print(docs[0].page_content)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "langchain0_1",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "name": "python",
   "version": "3.11.9"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
